1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 package sun.util.locale;
33
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.HashMap;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Set;
40
41 public class LanguageTag {
42
43
44
45 public static final String SEP = "-";
46 public static final String PRIVATEUSE = "x";
47 public static final String UNDETERMINED = "und";
48 public static final String PRIVUSE_VARIANT_PREFIX = "lvariant";
49
50
51
52
53 private String language = "";
54 private String script = "";
55 private String region = "";
56 private String privateuse = "";
57
58 private List<String> extlangs = Collections.emptyList();
59 private List<String> variants = Collections.emptyList();
60 private List<String> extensions = Collections.emptyList();
61
62
63
64
65 private static final Map<String, String[]> GRANDFATHERED = new HashMap<>();
66
67 static {
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 final String[][] entries = {
100
101 {"art-lojban", "jbo"},
102 {"cel-gaulish", "xtg-x-cel-gaulish"},
103 {"en-GB-oed", "en-GB-x-oed"},
104 {"i-ami", "ami"},
105 {"i-bnn", "bnn"},
106 {"i-default", "en-x-i-default"},
107 {"i-enochian", "und-x-i-enochian"},
108 {"i-hak", "hak"},
109 {"i-klingon", "tlh"},
110 {"i-lux", "lb"},
111 {"i-mingo", "see-x-i-mingo"},
112 {"i-navajo", "nv"},
113 {"i-pwn", "pwn"},
114 {"i-tao", "tao"},
115 {"i-tay", "tay"},
116 {"i-tsu", "tsu"},
117 {"no-bok", "nb"},
118 {"no-nyn", "nn"},
119 {"sgn-BE-FR", "sfb"},
120 {"sgn-BE-NL", "vgt"},
121 {"sgn-CH-DE", "sgg"},
122 {"zh-guoyu", "cmn"},
123 {"zh-hakka", "hak"},
124 {"zh-min", "nan-x-zh-min"},
125 {"zh-min-nan", "nan"},
126 {"zh-xiang", "hsn"},
127 };
128 for (String[] e : entries) {
129 GRANDFATHERED.put(LocaleUtils.toLowerString(e[0]), e);
130 }
131 }
132
133 private LanguageTag() {
134 }
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 public static LanguageTag parse(String languageTag, ParseStatus sts) {
182 if (sts == null) {
183 sts = new ParseStatus();
184 } else {
185 sts.reset();
186 }
187
188 StringTokenIterator itr;
189
190
191 String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag));
192 if (gfmap != null) {
193
194 itr = new StringTokenIterator(gfmap[1], SEP);
195 } else {
196 itr = new StringTokenIterator(languageTag, SEP);
197 }
198
199 LanguageTag tag = new LanguageTag();
200
201
202 if (tag.parseLanguage(itr, sts)) {
203 tag.parseExtlangs(itr, sts);
204 tag.parseScript(itr, sts);
205 tag.parseRegion(itr, sts);
206 tag.parseVariants(itr, sts);
207 tag.parseExtensions(itr, sts);
208 }
209 tag.parsePrivateuse(itr, sts);
210
211 if (!itr.isDone() && !sts.isError()) {
212 String s = itr.current();
213 sts.errorIndex = itr.currentStart();
214 if (s.length() == 0) {
215 sts.errorMsg = "Empty subtag";
216 } else {
217 sts.errorMsg = "Invalid subtag: " + s;
218 }
219 }
220
221 return tag;
222 }
223
224
225
226
227
228 private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) {
229 if (itr.isDone() || sts.isError()) {
230 return false;
231 }
232
233 boolean found = false;
234
235 String s = itr.current();
236 if (isLanguage(s)) {
237 found = true;
238 language = s;
239 sts.parseLength = itr.currentEnd();
240 itr.next();
241 }
242
243 return found;
244 }
245
246 private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) {
247 if (itr.isDone() || sts.isError()) {
248 return false;
249 }
250
251 boolean found = false;
252
253 while (!itr.isDone()) {
254 String s = itr.current();
255 if (!isExtlang(s)) {
256 break;
257 }
258 found = true;
259 if (extlangs.isEmpty()) {
260 extlangs = new ArrayList<>(3);
261 }
262 extlangs.add(s);
263 sts.parseLength = itr.currentEnd();
264 itr.next();
265
266 if (extlangs.size() == 3) {
267
268 break;
269 }
270 }
271
272 return found;
273 }
274
275 private boolean parseScript(StringTokenIterator itr, ParseStatus sts) {
276 if (itr.isDone() || sts.isError()) {
277 return false;
278 }
279
280 boolean found = false;
281
282 String s = itr.current();
283 if (isScript(s)) {
284 found = true;
285 script = s;
286 sts.parseLength = itr.currentEnd();
287 itr.next();
288 }
289
290 return found;
291 }
292
293 private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) {
294 if (itr.isDone() || sts.isError()) {
295 return false;
296 }
297
298 boolean found = false;
299
300 String s = itr.current();
301 if (isRegion(s)) {
302 found = true;
303 region = s;
304 sts.parseLength = itr.currentEnd();
305 itr.next();
306 }
307
308 return found;
309 }
310
311 private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) {
312 if (itr.isDone() || sts.isError()) {
313 return false;
314 }
315
316 boolean found = false;
317
318 while (!itr.isDone()) {
319 String s = itr.current();
320 if (!isVariant(s)) {
321 break;
322 }
323 found = true;
324 if (variants.isEmpty()) {
325 variants = new ArrayList<>(3);
326 }
327 variants.add(s);
328 sts.parseLength = itr.currentEnd();
329 itr.next();
330 }
331
332 return found;
333 }
334
335 private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) {
336 if (itr.isDone() || sts.isError()) {
337 return false;
338 }
339
340 boolean found = false;
341
342 while (!itr.isDone()) {
343 String s = itr.current();
344 if (isExtensionSingleton(s)) {
345 int start = itr.currentStart();
346 String singleton = s;
347 StringBuilder sb = new StringBuilder(singleton);
348
349 itr.next();
350 while (!itr.isDone()) {
351 s = itr.current();
352 if (isExtensionSubtag(s)) {
353 sb.append(SEP).append(s);
354 sts.parseLength = itr.currentEnd();
355 } else {
356 break;
357 }
358 itr.next();
359 }
360
361 if (sts.parseLength <= start) {
362 sts.errorIndex = start;
363 sts.errorMsg = "Incomplete extension '" + singleton + "'";
364 break;
365 }
366
367 if (extensions.isEmpty()) {
368 extensions = new ArrayList<>(4);
369 }
370 extensions.add(sb.toString());
371 found = true;
372 } else {
373 break;
374 }
375 }
376 return found;
377 }
378
379 private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) {
380 if (itr.isDone() || sts.isError()) {
381 return false;
382 }
383
384 boolean found = false;
385
386 String s = itr.current();
387 if (isPrivateusePrefix(s)) {
388 int start = itr.currentStart();
389 StringBuilder sb = new StringBuilder(s);
390
391 itr.next();
392 while (!itr.isDone()) {
393 s = itr.current();
394 if (!isPrivateuseSubtag(s)) {
395 break;
396 }
397 sb.append(SEP).append(s);
398 sts.parseLength = itr.currentEnd();
399
400 itr.next();
401 }
402
403 if (sts.parseLength <= start) {
404
405 sts.errorIndex = start;
406 sts.errorMsg = "Incomplete privateuse";
407 } else {
408 privateuse = sb.toString();
409 found = true;
410 }
411 }
412
413 return found;
414 }
415
416 public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
417 LanguageTag tag = new LanguageTag();
418
419 String language = baseLocale.getLanguage();
420 String script = baseLocale.getScript();
421 String region = baseLocale.getRegion();
422 String variant = baseLocale.getVariant();
423
424 boolean hasSubtag = false;
425
426 String privuseVar = null;
427
428 if (isLanguage(language)) {
429
430 if (language.equals("iw")) {
431 language = "he";
432 } else if (language.equals("ji")) {
433 language = "yi";
434 } else if (language.equals("in")) {
435 language = "id";
436 }
437 tag.language = language;
438 }
439
440 if (isScript(script)) {
441 tag.script = canonicalizeScript(script);
442 hasSubtag = true;
443 }
444
445 if (isRegion(region)) {
446 tag.region = canonicalizeRegion(region);
447 hasSubtag = true;
448 }
449
450
451 if (tag.language.equals("no") && tag.region.equals("NO") && variant.equals("NY")) {
452 tag.language = "nn";
453 variant = "";
454 }
455
456 if (variant.length() > 0) {
457 List<String> variants = null;
458 StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP);
459 while (!varitr.isDone()) {
460 String var = varitr.current();
461 if (!isVariant(var)) {
462 break;
463 }
464 if (variants == null) {
465 variants = new ArrayList<>();
466 }
467 variants.add(var);
468 varitr.next();
469 }
470 if (variants != null) {
471 tag.variants = variants;
472 hasSubtag = true;
473 }
474 if (!varitr.isDone()) {
475
476 StringBuilder buf = new StringBuilder();
477 while (!varitr.isDone()) {
478 String prvv = varitr.current();
479 if (!isPrivateuseSubtag(prvv)) {
480
481 break;
482 }
483 if (buf.length() > 0) {
484 buf.append(SEP);
485 }
486 buf.append(prvv);
487 varitr.next();
488 }
489 if (buf.length() > 0) {
490 privuseVar = buf.toString();
491 }
492 }
493 }
494
495 List<String> extensions = null;
496 String privateuse = null;
497
498 if (localeExtensions != null) {
499 Set<Character> locextKeys = localeExtensions.getKeys();
500 for (Character locextKey : locextKeys) {
501 Extension ext = localeExtensions.getExtension(locextKey);
502 if (isPrivateusePrefixChar(locextKey)) {
503 privateuse = ext.getValue();
504 } else {
505 if (extensions == null) {
506 extensions = new ArrayList<>();
507 }
508 extensions.add(locextKey.toString() + SEP + ext.getValue());
509 }
510 }
511 }
512
513 if (extensions != null) {
514 tag.extensions = extensions;
515 hasSubtag = true;
516 }
517
518
519 if (privuseVar != null) {
520 if (privateuse == null) {
521 privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar;
522 } else {
523 privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX
524 + SEP + privuseVar.replace(BaseLocale.SEP, SEP);
525 }
526 }
527
528 if (privateuse != null) {
529 tag.privateuse = privateuse;
530 }
531
532 if (tag.language.length() == 0 && (hasSubtag || privateuse == null)) {
533
534
535
536 tag.language = UNDETERMINED;
537 }
538
539 return tag;
540 }
541
542
543
544
545
546 public String getLanguage() {
547 return language;
548 }
549
550 public List<String> getExtlangs() {
551 if (extlangs.isEmpty()) {
552 return Collections.emptyList();
553 }
554 return Collections.unmodifiableList(extlangs);
555 }
556
557 public String getScript() {
558 return script;
559 }
560
561 public String getRegion() {
562 return region;
563 }
564
565 public List<String> getVariants() {
566 if (variants.isEmpty()) {
567 return Collections.emptyList();
568 }
569 return Collections.unmodifiableList(variants);
570 }
571
572 public List<String> getExtensions() {
573 if (extensions.isEmpty()) {
574 return Collections.emptyList();
575 }
576 return Collections.unmodifiableList(extensions);
577 }
578
579 public String getPrivateuse() {
580 return privateuse;
581 }
582
583
584
585
586
587 public static boolean isLanguage(String s) {
588
589
590
591
592
593 int len = s.length();
594 return (len >= 2) && (len <= 8) && LocaleUtils.isAlphaString(s);
595 }
596
597 public static boolean isExtlang(String s) {
598
599
600 return (s.length() == 3) && LocaleUtils.isAlphaString(s);
601 }
602
603 public static boolean isScript(String s) {
604
605 return (s.length() == 4) && LocaleUtils.isAlphaString(s);
606 }
607
608 public static boolean isRegion(String s) {
609
610
611 return ((s.length() == 2) && LocaleUtils.isAlphaString(s))
612 || ((s.length() == 3) && LocaleUtils.isNumericString(s));
613 }
614
615 public static boolean isVariant(String s) {
616
617
618 int len = s.length();
619 if (len >= 5 && len <= 8) {
620 return LocaleUtils.isAlphaNumericString(s);
621 }
622 if (len == 4) {
623 return LocaleUtils.isNumeric(s.charAt(0))
624 && LocaleUtils.isAlphaNumeric(s.charAt(1))
625 && LocaleUtils.isAlphaNumeric(s.charAt(2))
626 && LocaleUtils.isAlphaNumeric(s.charAt(3));
627 }
628 return false;
629 }
630
631 public static boolean isExtensionSingleton(String s) {
632
633
634
635
636
637
638 return (s.length() == 1)
639 && LocaleUtils.isAlphaString(s)
640 && !LocaleUtils.caseIgnoreMatch(PRIVATEUSE, s);
641 }
642
643 public static boolean isExtensionSingletonChar(char c) {
644 return isExtensionSingleton(String.valueOf(c));
645 }
646
647 public static boolean isExtensionSubtag(String s) {
648
649 int len = s.length();
650 return (len >= 2) && (len <= 8) && LocaleUtils.isAlphaNumericString(s);
651 }
652
653 public static boolean isPrivateusePrefix(String s) {
654
655 return (s.length() == 1)
656 && LocaleUtils.caseIgnoreMatch(PRIVATEUSE, s);
657 }
658
659 public static boolean isPrivateusePrefixChar(char c) {
660 return (LocaleUtils.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c)));
661 }
662
663 public static boolean isPrivateuseSubtag(String s) {
664
665 int len = s.length();
666 return (len >= 1) && (len <= 8) && LocaleUtils.isAlphaNumericString(s);
667 }
668
669
670
671
672
673 public static String canonicalizeLanguage(String s) {
674 return LocaleUtils.toLowerString(s);
675 }
676
677 public static String canonicalizeExtlang(String s) {
678 return LocaleUtils.toLowerString(s);
679 }
680
681 public static String canonicalizeScript(String s) {
682 return LocaleUtils.toTitleString(s);
683 }
684
685 public static String canonicalizeRegion(String s) {
686 return LocaleUtils.toUpperString(s);
687 }
688
689 public static String canonicalizeVariant(String s) {
690 return LocaleUtils.toLowerString(s);
691 }
692
693 public static String canonicalizeExtension(String s) {
694 return LocaleUtils.toLowerString(s);
695 }
696
697 public static String canonicalizeExtensionSingleton(String s) {
698 return LocaleUtils.toLowerString(s);
699 }
700
701 public static String canonicalizeExtensionSubtag(String s) {
702 return LocaleUtils.toLowerString(s);
703 }
704
705 public static String canonicalizePrivateuse(String s) {
706 return LocaleUtils.toLowerString(s);
707 }
708
709 public static String canonicalizePrivateuseSubtag(String s) {
710 return LocaleUtils.toLowerString(s);
711 }
712
713 @Override
714 public String toString() {
715 StringBuilder sb = new StringBuilder();
716
717 if (language.length() > 0) {
718 sb.append(language);
719
720 for (String extlang : extlangs) {
721 sb.append(SEP).append(extlang);
722 }
723
724 if (script.length() > 0) {
725 sb.append(SEP).append(script);
726 }
727
728 if (region.length() > 0) {
729 sb.append(SEP).append(region);
730 }
731
732 for (String variant : variants) {
733 sb.append(SEP).append(variant);
734 }
735
736 for (String extension : extensions) {
737 sb.append(SEP).append(extension);
738 }
739 }
740 if (privateuse.length() > 0) {
741 if (sb.length() > 0) {
742 sb.append(SEP);
743 }
744 sb.append(privateuse);
745 }
746
747 return sb.toString();
748 }
749 }